Given the results of the analysis of the Fluidigm data, here I explore the role of the V intercept in a more complex data (Allen).
We want to check if the intercept acts as a size factor here too and, if true, if W capture some interesting biology.
Interestingly, in this more complex dataset, both intercepts are needed to get a clear picture of the data in two dimensions.
As a first pass, I will only focus on the cells that passed the QC filters (by the original authors) and that were part of the “core” clusters. I will color-code the cells by either known cell type or by inferred cluster (inferred in the original study).
Select cells, remove ERCC spike-ins, filter out the genes that do not have at least 10 counts in at least 10 cells.
data("allen")
allen_core <- allen[grep("^ERCC-", rownames(allen), invert = TRUE),
which(colData(allen)$Core.Type=="Core")]
filter <- apply(assay(allen_core)>10, 1, sum)>=10
Number of retained genes:
print(sum(filter))
## [1] 11545
To speed up the computations, I will focus on the top 1,000 most variable genes.
allen_core <- allen_core[filter,]
core <- assay(allen_core)
vars <- rowVars(log1p(core))
names(vars) <- rownames(core)
vars <- sort(vars, decreasing = TRUE)
core <- core[names(vars)[1:1000],]
First, let’s look at PCA (of the log counts) for reference.
par(mfrow=c(1, 2))
bio <- as.factor(colData(allen_core)$driver_1_s)
cl <- as.factor(colData(allen_core)$Primary.Type)
detection_rate <- colSums(core>0)
coverage <- colSums(core)
pca <- prcomp(t(log1p(core)))
plot(pca$x, col=cols[bio], pch=19, main="PCA of log-counts, centered not scaled")
legend("bottomleft", levels(bio), fill=cols)
plot(pca$x, col=cols2[cl], pch=19, main="PCA of log-counts, centered not scaled")
df <- data.frame(PC1=pca$x[,1], PC2=pca$x[,2], detection_rate=detection_rate, coverage=coverage)
pairs(df, pch=19, col=cols[bio])
print(cor(df, method="spearman"))
## PC1 PC2 detection_rate coverage
## PC1 1.00000000 -0.01052424 0.8066812 0.4453172
## PC2 -0.01052424 1.00000000 -0.3601716 -0.3286485
## detection_rate 0.80668124 -0.36017158 1.0000000 0.5275845
## coverage 0.44531717 -0.32864852 0.5275845 1.0000000
print(system.time(zinb_Vall <- zinbFit(core, ncores = 3, K = 2)))
## user system elapsed
## 106.854 30.021 64.516
Plot the results with cells colored according to their biological condition.
par(mfrow=c(1, 2))
plot(zinb_Vall@W, col=cols[bio], pch=19, xlab="W1", ylab="W2", main="Both V intercepts")
legend("bottomright", levels(bio), fill=cols)
plot(zinb_Vall@W, col=cols2[cl], pch=19, xlab="W1", ylab="W2", main="Both V intercepts")
legend("topright", levels(cl), fill=cols2)
One interpretation of the model is that \(\gamma_mu\) will act as a “size factor”. Here, we check whether it captures sequencing depth and/or detection rate.
#total number of detected genes in the cell
df <- data.frame(W1=zinb_Vall@W[,1], W2=zinb_Vall@W[,2], gamma_mu = zinb_Vall@gamma_mu[1,], gamma_pi = zinb_Vall@gamma_pi[1,], detection_rate=detection_rate, coverage=coverage)
pairs(df, pch=19, col=cols[bio])
print(cor(df, method="spearman"))
## W1 W2 gamma_mu gamma_pi
## W1 1.00000000 0.03514491 -0.0843733 0.3598978
## W2 0.03514491 1.00000000 -0.3676352 0.3710653
## gamma_mu -0.08437330 -0.36763518 1.0000000 -0.3348686
## gamma_pi 0.35989777 0.37106533 -0.3348686 1.0000000
## detection_rate -0.35188976 -0.34026613 0.3712522 -0.9939701
## coverage -0.05250198 -0.05649406 0.8504471 -0.4846195
## detection_rate coverage
## W1 -0.3518898 -0.05250198
## W2 -0.3402661 -0.05649406
## gamma_mu 0.3712522 0.85044711
## gamma_pi -0.9939701 -0.48461953
## detection_rate 1.0000000 0.52758451
## coverage 0.5275845 1.00000000
\(\gamma_pi\) clearly captures the detection rate and \(\gamma_mu\) clearly captures the coverage. Interestingly, the two estimates are positively correlated.
par(mfrow=c(1, 2))
plot(rowMeans(getPi(zinb_Vall)), detection_rate, xlab="Average estimated Pi", ylab="detection rate", pch=19, col=cols[bio])
plot(rowMeans(log1p(getMu(zinb_Vall))), coverage, xlab="Average estimated log Mu", ylab="coverage", pch=19, col=cols[bio])
It is worth looking at the other estimates as well, namely, \(\beta\) and \(\alpha\).
par(mfrow=c(2, 2))
boxplot(zinb_Vall@beta_mu[1,], main="Beta_Mu")
boxplot(zinb_Vall@beta_pi[1,], main="Beta_Pi")
plot(t(zinb_Vall@alpha_mu), xlab="Alpha_Mu 1", ylab="Alpha_Mu 2", main="Alpha_Mu")
plot(t(zinb_Vall@alpha_pi), xlab="Alpha_Pi 1", ylab="Alpha_Pi 2", main="Alpha_Pi")
print(system.time(zinb_Vnone <- zinbFit(core, ncores = 3, K = 2, V=matrix(0, ncol=1, nrow=NROW(core)))))
## user system elapsed
## 120.871 30.694 59.448
par(mfrow=c(1, 2))
plot(zinb_Vnone@W, col=cols[bio], pch=19, xlab="W1", ylab="W2")
legend("topleft", levels(bio), fill=cols)
plot(zinb_Vnone@W, col=cols2[cl], pch=19, xlab="W1", ylab="W2")
df <- data.frame(W1 = zinb_Vnone@W[,1], W2 = zinb_Vnone@W[,2], detection_rate=detection_rate, coverage=coverage)
pairs(df, pch=19, col=cols[bio])
print(cor(df, method="spearman"))
## W1 W2 detection_rate coverage
## W1 1.000000000 0.005426967 -0.4105128 -0.1328474
## W2 0.005426967 1.000000000 0.8736250 0.5865832
## detection_rate -0.410512821 0.873624999 1.0000000 0.5275845
## coverage -0.132847434 0.586583172 0.5275845 1.0000000
par(mfrow=c(1, 2))
plot(rowMeans(getPi(zinb_Vnone)), detection_rate, xlab="Average estimated Pi", ylab="detection rate", pch=19, col=cols[bio])
plot(rowMeans(log1p(getMu(zinb_Vnone))), coverage, xlab="Average estimated log Mu", ylab="coverage", pch=19, col=cols[bio])
It is worth looking at the other estimates as well, namely, \(\beta\) and \(\alpha\).
par(mfrow=c(2, 2))
boxplot(zinb_Vnone@beta_mu[1,], main="Beta_Mu")
boxplot(zinb_Vnone@beta_pi[1,], main="Beta_Pi")
plot(t(zinb_Vnone@alpha_mu), xlab="Alpha_Mu 1", ylab="Alpha_Mu 2", main="Alpha_Mu")
plot(t(zinb_Vnone@alpha_pi), xlab="Alpha_Pi 1", ylab="Alpha_Pi 2", main="Alpha_Pi")
V <- cbind(rep(0, NROW(core)), rep(1, NROW(core)))
print(system.time(zinb_Vmu <- zinbFit(core, ncores = 3, K = 2, V=V, which_V_mu=2L, which_V_pi=1L)))
## user system elapsed
## 117.983 30.208 62.073
par(mfrow=c(1, 2))
plot(zinb_Vmu@W, col=cols[bio], pch=19, xlab="W1", ylab="W2")
plot(zinb_Vmu@W, col=cols2[cl], pch=19, xlab="W1", ylab="W2")
df <- data.frame(W1 = zinb_Vmu@W[,1], W2 = zinb_Vmu@W[,2], gamma_mu = zinb_Vmu@gamma_mu[1,], detection_rate=detection_rate, coverage=coverage)
pairs(df, pch=19, col=cols[bio])
print(cor(df, method="spearman"))
## W1 W2 gamma_mu detection_rate coverage
## W1 1.00000000 -0.06020466 0.0485493 -0.2663277 -0.1083201
## W2 -0.06020466 1.00000000 0.5593379 0.8898885 0.3772103
## gamma_mu 0.04854930 0.55933792 1.0000000 0.5880261 0.9235989
## detection_rate -0.26632771 0.88988849 0.5880261 1.0000000 0.5275845
## coverage -0.10832007 0.37721026 0.9235989 0.5275845 1.0000000
par(mfrow=c(1, 2))
plot(rowMeans(getPi(zinb_Vmu)), detection_rate, xlab="Average estimated Pi", ylab="detection rate", pch=19, col=cols[bio])
plot(rowMeans(log1p(getMu(zinb_Vmu))), coverage, xlab="Average estimated log Mu", ylab="coverage", pch=19, col=cols[bio])
It is worth looking at the other estimates as well, namely, \(\beta\) and \(\alpha\).
par(mfrow=c(2, 2))
boxplot(zinb_Vmu@beta_mu[1,], main="Beta_Mu")
boxplot(zinb_Vmu@beta_pi[1,], main="Beta_Pi")
plot(t(zinb_Vmu@alpha_mu), xlab="Alpha_Mu 1", ylab="Alpha_Mu 2", main="Alpha_Mu")
plot(t(zinb_Vmu@alpha_pi), xlab="Alpha_Pi 1", ylab="Alpha_Pi 2", main="Alpha_Pi")
print(system.time(zinb_Vpi <- zinbFit(core, ncores = 3, K = 2, V=V, which_V_mu=1L, which_V_pi=2L)))
## user system elapsed
## 152.040 40.292 80.801
par(mfrow=c(1, 2))
plot(zinb_Vpi@W, col=cols[bio], pch=19, xlab="W1", ylab="W2")
plot(zinb_Vpi@W, col=cols2[cl], pch=19, xlab="W1", ylab="W2")
df <- data.frame(W1 = zinb_Vpi@W[,1], W2 = zinb_Vpi@W[,2], gamma_pi = zinb_Vpi@gamma_pi[1,], detection_rate=detection_rate, coverage=coverage)
pairs(df, pch=19, col=cols[bio])
print(cor(df, method="spearman"))
## W1 W2 gamma_pi detection_rate
## W1 1.00000000 0.1305085 0.09593638 -0.09884142
## W2 0.13050848 1.0000000 -0.37656124 0.27518468
## gamma_pi 0.09593638 -0.3765612 1.00000000 -0.98667190
## detection_rate -0.09884142 0.2751847 -0.98667190 1.00000000
## coverage 0.15676103 -0.2568486 -0.47896136 0.52758451
## coverage
## W1 0.1567610
## W2 -0.2568486
## gamma_pi -0.4789614
## detection_rate 0.5275845
## coverage 1.0000000
par(mfrow=c(1, 2))
plot(rowMeans(getPi(zinb_Vpi)), detection_rate, xlab="Average estimated Pi", ylab="detection rate", pch=19, col=cols[bio])
plot(rowMeans(log1p(getMu(zinb_Vpi))), coverage, xlab="Average estimated log Mu", ylab="coverage", pch=19, col=cols[bio])
It is worth looking at the other estimates as well, namely, \(\beta\) and \(\alpha\).
par(mfrow=c(2, 2))
boxplot(zinb_Vpi@beta_mu[1,], main="Beta_Mu")
boxplot(zinb_Vpi@beta_pi[1,], main="Beta_Pi")
plot(t(zinb_Vpi@alpha_mu), xlab="Alpha_Mu 1", ylab="Alpha_Mu 2", main="Alpha_Mu")
plot(t(zinb_Vpi@alpha_pi), xlab="Alpha_Pi 1", ylab="Alpha_Pi 2", main="Alpha_Pi")
print(system.time(zinb_3 <- zinbFit(core, ncores = 3, K = 3)))
## user system elapsed
## 121.030 32.532 61.484
pairs(zinb_3@W, col=cols[bio], pch=19)
pairs(zinb_3@W, col=cols2[cl], pch=19)
## write matrices to file to feed to ZIFA in python
write.csv(log1p(core), file="allen.csv")
coreAll <- assay(allen_core)
dim(coreAll)
## [1] 11545 285
print(system.time(zinb_all <- zinbFit(coreAll, ncores = 3, K = 2)))
## user system elapsed
## 1175.281 154.257 574.979
par(mfrow=c(1, 2))
plot(zinb_all@W, col=cols[bio], pch=19, xlab="W1", ylab="W2")
plot(zinb_all@W, col=cols2[cl], pch=19, xlab="W1", ylab="W2")
Interestingly, when using all the genes, things don’t work nicely anymore. I wonder if the difference is the selection of the most variable genes, rather than the complexity of the data.
Full algorithm was used. We got very similar results using block algorithm.
import sys
# modify python path to your python packages directory
sys.path.insert(1, '/usr/local/lib/python2.7/site-packages/')
from pandas import read_csv
from ZIFA import ZIFA
import numpy as np
df = read_csv('allen.csv')
del df['Unnamed: 0']
lc = np.array(df)
Y = np.transpose(lc)
Z, model_params = ZIFA.fitModel(Y, 2)
np.savetxt('allen_zifa.csv', Z, delimiter=',')
## Running zero-inflated factor analysis with N = 285, D = 1000, K = 2
## Param change below threshold 1.000e-02 after 6 iterations
par(mfrow=c(1, 2))
zifa_res <- read.csv("allen_zifa.csv", header=FALSE)
plot(zifa_res, col=cols[bio], pch=19, xlab="Z1", ylab="Z2")
plot(zifa_res, col=cols2[cl], pch=19, xlab="Z1", ylab="Z2")
wrapRzifa <- function(Y, block = F){
# wrapper R function for ZIFA.
# md5 hashing and temporary files are used not to re-run zifa
# if it has already be run on this computer.
d = digest(Y, "md5")
tmp = paste0(tempdir(), '/', d)
write.csv(Y, paste0(tmp, '.csv'))
if (!file.exists(paste0(tmp, '_zifa.csv'))){
print('run ZIFA')
bb = ifelse(block, '-b ', '')
cmd = sprintf('python run_zifa.py %s%s.csv %s_zifa.csv', bb, tmp, tmp)
system(cmd)
}
read.csv(sprintf("%s_zifa.csv", tmp), header=FALSE)
}
Y = log2(core + 1)
zifa_res = wrapRzifa(Y)
plot(zifa_res, col=cols[bio], pch=19, xlab="Z1", ylab="Z2")
plot(zifa_res, col=cols2[cl], pch=19, xlab="Z1", ylab="Z2")
df <- data.frame(Z1 = zifa_res$V1,
Z2 = zifa_res$V2,
detection_rate = detection_rate,
coverage = coverage)
pairs(df, pch=19, col=cols[bio])
print(cor(df, method="spearman"))
## Z1 Z2 detection_rate coverage
## Z1 1.00000000 0.07493093 -0.3890703 -0.2343373
## Z2 0.07493093 1.00000000 0.6859741 0.4423235
## detection_rate -0.38907034 0.68597407 1.0000000 0.5275845
## coverage -0.23433727 0.44232350 0.5275845 1.0000000
df <- data.frame(W1 = zinb_Vall@W[,1],
W2 = zinb_Vall@W[,2],
Z1 = zifa_res$V1,
Z2 = zifa_res$V2)
pairs(df, pch=19, col=cols[bio], main = 'Intercept in both Mu and Pi')
print(cor(df, method="spearman"))
## W1 W2 Z1 Z2
## W1 1.00000000 0.03514491 0.87737303 -0.06011083
## W2 0.03514491 1.00000000 -0.27471165 -0.77121359
## Z1 0.87737303 -0.27471165 1.00000000 0.07493093
## Z2 -0.06011083 -0.77121359 0.07493093 1.00000000
df <- data.frame(W1 = zinb_Vnone@W[,1],
W2 = zinb_Vnone@W[,2],
Z1 = zifa_res$V1,
Z2 = zifa_res$V2)
pairs(df, pch=19, col=cols[bio], main = 'No intercept')
print(cor(df, method="spearman"))
## W1 W2 Z1 Z2
## W1 1.000000000 0.005426967 0.940024468 0.11583976
## W2 0.005426967 1.000000000 -0.008162482 0.88366985
## Z1 0.940024468 -0.008162482 1.000000000 0.07493093
## Z2 0.115839757 0.883669851 0.074930925 1.00000000
df <- data.frame(W1 = zinb_Vmu@W[,1],
W2 = zinb_Vmu@W[,2],
Z1 = zifa_res$V1,
Z2 = zifa_res$V2)
pairs(df, pch=19, col=cols[bio], main = 'Intercept in Mu')
print(cor(df, method="spearman"))
## W1 W2 Z1 Z2
## W1 1.00000000 -0.06020466 0.93880730 0.27101971
## W2 -0.06020466 1.00000000 -0.21813102 0.85510842
## Z1 0.93880730 -0.21813102 1.00000000 0.07493093
## Z2 0.27101971 0.85510842 0.07493093 1.00000000
df <- data.frame(W1 = zinb_Vpi@W[,1],
W2 = zinb_Vpi@W[,2],
Z1 = zifa_res$V1,
Z2 = zifa_res$V2)
pairs(df, pch=19, col=cols[bio], main = 'Intercept in Pi')
print(cor(df, method="spearman"))
## W1 W2 Z1 Z2
## W1 1.0000000 0.1305085 0.88529862 0.37835330
## W2 0.1305085 1.0000000 0.13259032 0.63760776
## Z1 0.8852986 0.1325903 1.00000000 0.07493093
## Z2 0.3783533 0.6376078 0.07493093 1.00000000
library(biomaRt)
mart = useMart("ensembl")
mart = useDataset("hsapiens_gene_ensembl", mart = mart)
attrs = c("hgnc_symbol", "entrezgene")
bm = getBM(attributes=attrs, mart = mart)
coreGC = core
rownames(coreGC) = toupper(rownames(core))
bm = bm[match(rownames(coreGC), bm[,1]),]
bm = na.omit(bm)
gene_info = getGeneLengthAndGCContent(as.character(bm[,2]), "hg38", mode="org.db")
rownames(gene_info) = bm[,1]
gene_info = na.omit(gene_info)
coreGC = coreGC[rownames(gene_info),]
dim(core)
## [1] 1000 285
dim(coreGC)
## [1] 902 285
par(mfrow=c(1,2))
gene_detection_rate = rowSums(coreGC>0)/ncol(coreGC)
plot(gene_info[,'length'], gene_detection_rate, xlab = 'Gene Length',
ylab = 'Gene detection rate')
plot(gene_info[,'gc'], gene_detection_rate, xlab = 'GC',
ylab = 'Gene detection rate')
We keep intercept in both Vmu and Vpi and add GC and length to both Vmu and Vpi.
V = cbind(rep(1, NROW(coreGC)), gene_info)
print(system.time(zinb_gc <- zinbFit(coreGC, ncores = 3, K = 2, V = V)))
## user system elapsed
## 129.046 12.763 60.878
plot(zinb_gc@W, col=cols[bio], pch=19, xlab="W1", ylab="W2")
legend("topleft", levels(bio), fill=cols)
plot(zinb_gc@W, col=cols2[cl], pch=19, xlab="W1", ylab="W2")
legend("topleft", levels(cl), fill=cols2)
df <- data.frame(W1=zinb_gc@W[,1],
W2=zinb_gc@W[,2],
gamma_mu = zinb_gc@gamma_mu[1,],
gamma_pi = zinb_gc@gamma_pi[1,],
detection_rate=detection_rate,
coverage=coverage)
pairs(df, pch=19, col=cols[bio])
print(cor(df, method="spearman"))
## W1 W2 gamma_mu gamma_pi
## W1 1.00000000 -0.54605587 -0.09412048 0.31023239
## W2 -0.54605587 1.00000000 0.12451440 -0.10371060
## gamma_mu -0.09412048 0.12451440 1.00000000 -0.03403039
## gamma_pi 0.31023239 -0.10371060 -0.03403039 1.00000000
## detection_rate -0.42328832 0.04964245 0.17586323 -0.63587802
## coverage -0.06754498 0.03298118 0.58049215 -0.27384906
## detection_rate coverage
## W1 -0.42328832 -0.06754498
## W2 0.04964245 0.03298118
## gamma_mu 0.17586323 0.58049215
## gamma_pi -0.63587802 -0.27384906
## detection_rate 1.00000000 0.52758451
## coverage 0.52758451 1.00000000
df <- data.frame(alphaMu1=zinb_gc@alpha_mu[1,],
alphaMu2=zinb_gc@alpha_mu[2,],
alphaPi1=zinb_gc@alpha_pi[1,],
alphaPi2=zinb_gc@alpha_pi[2,],
gene_detection_rate=gene_detection_rate)
pairs(df, pch=19, col=cols[bio])
print(cor(df, method="spearman"))
## alphaMu1 alphaMu2 alphaPi1 alphaPi2
## alphaMu1 1.00000000 -0.09034811 -0.67360284 0.05224543
## alphaMu2 -0.09034811 1.00000000 0.03646762 -0.62179332
## alphaPi1 -0.67360284 0.03646762 1.00000000 0.01937837
## alphaPi2 0.05224543 -0.62179332 0.01937837 1.00000000
## gene_detection_rate -0.15790047 -0.03702463 0.17796001 0.06782407
## gene_detection_rate
## alphaMu1 -0.15790047
## alphaMu2 -0.03702463
## alphaPi1 0.17796001
## alphaPi2 0.06782407
## gene_detection_rate 1.00000000
We keep intercept in both Vmu and Vpi and add GC and length only in Vmu.
V = cbind(rep(1, NROW(coreGC)), gene_info)
print(system.time(zinb_gc <- zinbFit(coreGC, ncores = 3, K = 2, V = V,
which_V_mu=1:3, which_V_pi=1L)))
## user system elapsed
## 112.998 11.981 52.600
plot(zinb_gc@W, col=cols[bio], pch=19, xlab="W1", ylab="W2")
legend("topleft", levels(bio), fill=cols)
plot(zinb_gc@W, col=cols2[cl], pch=19, xlab="W1", ylab="W2")
legend("topleft", levels(cl), fill=cols2)
df <- data.frame(W1=zinb_gc@W[,1],
W2=zinb_gc@W[,2],
gamma_mu = zinb_gc@gamma_mu[1,],
gamma_pi = zinb_gc@gamma_pi[1,],
detection_rate=detection_rate,
coverage=coverage)
pairs(df, pch=19, col=cols[bio])
print(cor(df, method="spearman"))
## W1 W2 gamma_mu gamma_pi
## W1 1.0000000 -0.56477422 -0.1032456 0.43943455
## W2 -0.5647742 1.00000000 0.1316033 -0.05278191
## gamma_mu -0.1032456 0.13160331 1.0000000 -0.13746261
## gamma_pi 0.4394345 -0.05278191 -0.1374626 1.00000000
## detection_rate -0.4389583 0.06428882 0.1800088 -0.99334074
## coverage -0.0802143 0.02731886 0.5801925 -0.48232309
## detection_rate coverage
## W1 -0.43895825 -0.08021430
## W2 0.06428882 0.02731886
## gamma_mu 0.18000881 0.58019253
## gamma_pi -0.99334074 -0.48232309
## detection_rate 1.00000000 0.52758451
## coverage 0.52758451 1.00000000
df <- data.frame(alphaMu1=zinb_gc@alpha_mu[1,],
alphaMu2=zinb_gc@alpha_mu[2,],
alphaPi1=zinb_gc@alpha_pi[1,],
alphaPi2=zinb_gc@alpha_pi[2,],
gene_detection_rate=gene_detection_rate)
pairs(df, pch=19, col=cols[bio])
print(cor(df, method="spearman"))
## alphaMu1 alphaMu2 alphaPi1 alphaPi2
## alphaMu1 1.00000000 -0.08721723 -0.67039583 0.05977531
## alphaMu2 -0.08721723 1.00000000 0.03998078 -0.62303895
## alphaPi1 -0.67039583 0.03998078 1.00000000 0.01100490
## alphaPi2 0.05977531 -0.62303895 0.01100490 1.00000000
## gene_detection_rate -0.15733394 -0.04386984 0.18069937 0.05793565
## gene_detection_rate
## alphaMu1 -0.15733394
## alphaMu2 -0.04386984
## alphaPi1 0.18069937
## alphaPi2 0.05793565
## gene_detection_rate 1.00000000
We keep intercept in both Vmu and Vpi and add GC and length only in Vpi.
V = cbind(rep(1, NROW(coreGC)), gene_info)
print(system.time(zinb_gc <- zinbFit(coreGC, ncores = 3, K = 2, V = V,
which_V_mu=1L, which_V_pi=1:3)))
## user system elapsed
## 147.383 18.599 106.261
plot(zinb_gc@W, col=cols[bio], pch=19, xlab="W1", ylab="W2")
legend("topleft", levels(bio), fill=cols)
plot(zinb_gc@W, col=cols2[cl], pch=19, xlab="W1", ylab="W2")
legend("topleft", levels(cl), fill=cols2)
df <- data.frame(W1=zinb_gc@W[,1],
W2=zinb_gc@W[,2],
gamma_mu = zinb_gc@gamma_mu[1,],
gamma_pi = zinb_gc@gamma_pi[1,],
detection_rate=detection_rate,
coverage=coverage)
pairs(df, pch=19, col=cols[bio])
print(cor(df, method="spearman"))
## W1 W2 gamma_mu gamma_pi
## W1 1.00000000 0.24532650 0.08201828 0.1390043
## W2 0.24532650 1.00000000 0.36545537 -0.2722286
## gamma_mu 0.08201828 0.36545537 1.00000000 -0.1374051
## gamma_pi 0.13900429 -0.27222859 -0.13740507 1.0000000
## detection_rate -0.14661948 0.43639636 0.33830194 -0.6260981
## coverage -0.04965709 0.06601056 0.82969617 -0.2656031
## detection_rate coverage
## W1 -0.1466195 -0.04965709
## W2 0.4363964 0.06601056
## gamma_mu 0.3383019 0.82969617
## gamma_pi -0.6260981 -0.26560311
## detection_rate 1.0000000 0.52758451
## coverage 0.5275845 1.00000000
df <- data.frame(alphaMu1=zinb_gc@alpha_mu[1,],
alphaMu2=zinb_gc@alpha_mu[2,],
alphaPi1=zinb_gc@alpha_pi[1,],
alphaPi2=zinb_gc@alpha_pi[2,],
gene_detection_rate=gene_detection_rate)
pairs(df, pch=19, col=cols[bio])
print(cor(df, method="spearman"))
## alphaMu1 alphaMu2 alphaPi1 alphaPi2
## alphaMu1 1.0000000000 -0.04339095 -0.60120643 0.05916533
## alphaMu2 -0.0433909522 1.00000000 0.10041003 -0.60652687
## alphaPi1 -0.6012064296 0.10041003 1.00000000 -0.15392199
## alphaPi2 0.0591653284 -0.60652687 -0.15392199 1.00000000
## gene_detection_rate -0.0004593927 0.16423501 0.06517973 -0.17809078
## gene_detection_rate
## alphaMu1 -0.0004593927
## alphaMu2 0.1642350091
## alphaPi1 0.0651797333
## alphaPi2 -0.1780907788
## gene_detection_rate 1.0000000000